#!/usr/bin/env python
# coding=utf-8
# __author__ = 'Yunchao Ling'

from pymongo import MongoClient
from tqdm import tqdm

valid_chrom = set(
    ["1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "20",
     "21", "22"])

client = MongoClient("mongodb://han:han2019@mongo-han.handb:27017")
collection = client["hanvcf"]["snvs_20220612"]

infile = open("variation_info_GRCh37_20220705.txt", "r")
outfile = open("rsid_count.tsv_20220705.tsv", "w")
outfile.write("rsid1\trsid2\tcount\n")
outfile.flush()
infile.readline()
for line in tqdm(infile):
    line = line.rstrip("\n")
    splitline = line.split("\t")
    rsid1 = splitline[0]
    rsid2 = splitline[1]
    rsid_list = rsid1.split("|")
    if rsid2 != "NA":
        rsid_list.append(rsid2)
    rsid_list = list(set(rsid_list))
    result = collection.count_documents({"rsid": {"$in": rsid_list}})

    outfile.write("%s\t%s\t%d\n" % (rsid1, rsid2, result))
    outfile.flush()
infile.close()
outfile.close()
